{
 "cells": [
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "In this notebook we explore the `evaluate` function offered by `ranx`."
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "First of all we need to install [ranx](https://github.com/AmenRa/ranx)\n",
    "\n",
    "Mind that the first time you run any ranx' functions they may take a while as they must be compiled first"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "!pip install -U ranx"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "Download the data we need"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "import os\n",
    "import requests\n",
    "\n",
    "for file in [\"qrels\", \"results\"]:\n",
    "    os.makedirs(\"notebooks/data\", exist_ok=True)\n",
    "\n",
    "    with open(f\"notebooks/data/{file}.test\", \"w\") as f:\n",
    "        master = f\"https://raw.githubusercontent.com/AmenRa/ranx/master/notebooks/data/{file}.test\"\n",
    "        f.write(requests.get(master).text)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "from ranx import Qrels, Run, evaluate"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "qrels = Qrels.from_file(\"notebooks/data/qrels.test\", kind=\"trec\")\n",
    "run = Run.from_file(\"notebooks/data/results.test\", kind=\"trec\")"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "Evaluate\n",
    "\n",
    "For a full list of the available metrics see [here](https://amenra.github.io/ranx/metrics/)."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "# Single metric\n",
    "print(evaluate(qrels, run, \"hits\"))\n",
    "print(evaluate(qrels, run, \"hit_rate\"))\n",
    "print(evaluate(qrels, run, \"precision\"))\n",
    "print(evaluate(qrels, run, \"recall\"))\n",
    "print(evaluate(qrels, run, \"f1\"))\n",
    "print(evaluate(qrels, run, \"r-precision\"))\n",
    "print(evaluate(qrels, run, \"mrr\"))\n",
    "print(evaluate(qrels, run, \"map\"))\n",
    "print(evaluate(qrels, run, \"ndcg\"))\n",
    "print(evaluate(qrels, run, \"bpref\"))\n",
    "print(evaluate(qrels, run, \"rbp.95\"))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "# Single metric with cutoff\n",
    "evaluate(qrels, run, \"ndcg@10\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "# Multiple metrics\n",
    "evaluate(qrels, run, [\"map\", \"mrr\", \"ndcg\"])"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "# Multiple metrics with cutoffs (you can use different cutoffs for each metric)\n",
    "evaluate(qrels, run, [\"map@100\", \"mrr@10\", \"ndcg@10\"])"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "# By default, scores are saved in the evaluated Run\n",
    "# You can disable this behaviour by passing `save_results_in_run=False`\n",
    "# when calling `evaluate`\n",
    "run.mean_scores"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "import json  # Just for pretty printing\n",
    "\n",
    "print(json.dumps(run.scores, indent=4))\n",
    "\n",
    "# 301, 302, and 303 are the query ids"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "# Alternatively, per query scores can be extracted as Numpy Arrays by passing\n",
    "# `return_mean = False` to `evaluate`\n",
    "evaluate(qrels, run, [\"map@100\", \"mrr@10\", \"ndcg@10\"], return_mean=False)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "# Finally, you can set the number of threads used for computing the metric\n",
    "# scores, by passing `threads = n` to `evaluate`\n",
    "# `threads = 0` by default, which means all the available threads will be used\n",
    "# Note that if the number of queries is small, `ranx` will automatically set\n",
    "# `threads = 1` to prevent performance degradations\n",
    "evaluate(qrels, run, [\"map@100\", \"mrr@10\", \"ndcg@10\"], threads=1)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": []
  }
 ],
 "metadata": {
  "interpreter": {
   "hash": "692976b4e7dc85f192e7ff7f01132f0a9cfec5b2147f9c392e0014b728af08ff"
  },
  "kernelspec": {
   "display_name": "Python 3.7.12 ('ranx')",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.7.12"
  },
  "orig_nbformat": 4
 },
 "nbformat": 4,
 "nbformat_minor": 2
}
